#------------------------------------------------------------------------------
# aggregate to state or county units
#==============================================================================

load_library = c('bit64','data.table','fst','future.apply','stringr','logger','vroom')
invisible(lapply(load_library, function(x) library(x, character.only=TRUE, quietly= TRUE)))

bucket = '/N/project/iuni_doctorshopping'

# read arguments from Snakefile
args=commandArgs(TRUE)
infile_reg = args[[1]]
infile_first_op = args[[2]]
outfile = args[[3]]
geo_unit = args[[4]]

logger::log_info('now reading data ', infile_reg)

pat_opioid_first = read_fst(infile_first_op, as.data.table = TRUE)

if (grepl('W',basename(infile_reg))){
	year_week = str_extract(infile_reg,"\\d{4}W\\d{2}")
	year_quarter = zoo::as.yearqtr(as.Date(tsibble::yearweek(year_week)))
	year = as.numeric(str_extract(year_quarter,'\\d{4}'))
	quarter = as.numeric(gsub('Q','',str_extract(year_quarter,'Q\\d{1}')))
	year_quarter_num = year * 4 + quarter 
} else {
	stop('not yet implemented')	
}

pat_opioid_first[,year := as.numeric(year)]
pat_opioid_first[,quarter := as.numeric(quarter)]

previous_op_taker = unique(pat_opioid_first[(4 * year + quarter) < year_quarter_num,'PATID'])
previous_op_taker[, op_naive := 0]

mbr = read_fst(infile_reg, as.data.table = TRUE)
setnames(mbr, 'DIVISION', 'division')

mbr[, opioid := ifelse(opioids > 0 , 1, 0)]

# merge with opioid prescription to define opioid_naive
mbr = merge(mbr, previous_op_taker,by='PATID',all.x=TRUE)
mbr[is.na(op_naive) & opioids > 0,op_naive := 1]

# identify opioid prescription + therpay together 
mbr[, opioid_therapy := as.integer(opioids > 0 & therapy > 0)]
mbr[, opioid_only := as.integer(opioids > 0 & therapy == 0)]
mbr[, therapy_only := as.integer(opioids == 0 & therapy > 0)]

mbr[, targetpain := ifelse(backpain == 1 | neckpain == 1 | limbpain == 1, 1L, 0L)]

select_var = c('cancer','palliative','od','ord','pain',
	'targetpain','backpain','neckpain','limbpain',
	'sum_opioid_quantity','sum_opioid_days','sum_opioid_mme',
	'op_naive','opioids','therapy','opioid_therapy','opioid_only','therapy_only',
	'mat','methadone','naltrexone','n_opioid_prescription')

logger::log_info('now aggregating data ', infile_reg)

N_mbr = mbr[, .(N=.N), by=geo_unit]

mbr[, sum_opioid_quantity := as.numeric(sum_opioid_quantity)]
mbr[, sum_opioid_days := as.numeric(sum_opioid_days)]
mbr[, sum_opioid_mme := as.numeric(sum_opioid_mme)]

mean_mbr = mbr[, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(mean_mbr) = paste0('all_mean_',names(mean_mbr))
names(mean_mbr)[1] = geo_unit

sum_mbr = mbr[, lapply(.SD, sum, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(sum_mbr) = paste0('all_sum_',names(sum_mbr))
names(sum_mbr)[1] = geo_unit

pain_mean_mbr = mbr[pain==1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(pain_mean_mbr) = paste0('pain_mean_',names(pain_mean_mbr))
names(pain_mean_mbr)[1] = geo_unit

pain_sum_mbr = mbr[pain==1, lapply(.SD, sum, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(pain_sum_mbr) = paste0('pain_sum_',names(pain_sum_mbr))
names(pain_sum_mbr)[1] = geo_unit

backpain_mean_mbr = mbr[backpain==1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(backpain_mean_mbr) = paste0('backpain_mean_',names(backpain_mean_mbr))
names(backpain_mean_mbr)[1] = geo_unit

backpain_sum_mbr = mbr[backpain==1, lapply(.SD, sum, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(backpain_sum_mbr) = paste0('backpain_sum_',names(backpain_sum_mbr))
names(backpain_sum_mbr)[1] = geo_unit

limbpain_mean_mbr = mbr[limbpain==1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(limbpain_mean_mbr) = paste0('limbpain_mean_',names(limbpain_mean_mbr))
names(limbpain_mean_mbr)[1] = geo_unit

limbpain_sum_mbr = mbr[limbpain==1, lapply(.SD, sum, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(limbpain_sum_mbr) = paste0('limbpain_sum_',names(limbpain_sum_mbr))
names(limbpain_sum_mbr)[1] = geo_unit

neckpain_mean_mbr = mbr[neckpain==1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(neckpain_mean_mbr) = paste0('neckpain_mean_',names(neckpain_mean_mbr))
names(neckpain_mean_mbr)[1] = geo_unit

neckpain_sum_mbr = mbr[neckpain==1, lapply(.SD, sum, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(neckpain_sum_mbr) = paste0('neckpain_sum_',names(neckpain_sum_mbr))
names(neckpain_sum_mbr)[1] = geo_unit

targetpain_mean_mbr = mbr[targetpain==1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(targetpain_mean_mbr) = paste0('targetpain_mean_',names(targetpain_mean_mbr))
names(targetpain_mean_mbr)[1] = geo_unit

targetpain_sum_mbr = mbr[targetpain==1, lapply(.SD, sum, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(targetpain_sum_mbr) = paste0('targetpain_sum_',names(targetpain_sum_mbr))
names(targetpain_sum_mbr)[1] = geo_unit

ord_mean_mbr = mbr[ord==1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(ord_mean_mbr) = paste0('ord_mean_',names(ord_mean_mbr))
names(ord_mean_mbr)[1] = geo_unit

ord_sum_mbr = mbr[ord==1, lapply(.SD, sum, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(ord_sum_mbr) = paste0('ord_sum_',names(ord_sum_mbr))
names(ord_sum_mbr)[1] = geo_unit

od_mean_mbr = mbr[od==1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(od_mean_mbr) = paste0('od_mean_',names(od_mean_mbr))
names(od_mean_mbr)[1] = geo_unit

od_sum_mbr = mbr[od==1, lapply(.SD, sum, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(od_sum_mbr) = paste0('od_sum_',names(od_sum_mbr))
names(od_sum_mbr)[1] = geo_unit

opioid_mean_mbr = mbr[opioid==1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(opioid_mean_mbr) = paste0('opioid_mean_',names(opioid_mean_mbr))
names(opioid_mean_mbr)[1] = geo_unit

opioid_sum_mbr = mbr[opioid==1, lapply(.SD, sum, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(opioid_sum_mbr) = paste0('opioid_sum_',names(opioid_sum_mbr))
names(opioid_sum_mbr)[1] = geo_unit

op_naive_mean_mbr = mbr[op_naive==1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(op_naive_mean_mbr) = paste0('op_naive_mean_',names(op_naive_mean_mbr))
names(op_naive_mean_mbr)[1] = geo_unit

op_naive_sum_mbr = mbr[op_naive==1, lapply(.SD, sum, na.rm=TRUE), by=geo_unit, .SDcols=select_var] 
names(op_naive_sum_mbr) = paste0('op_naive_sum_',names(op_naive_sum_mbr))
names(op_naive_sum_mbr)[1] = geo_unit

# add transition here 
trans_opioid_zero  = mbr[opioid_t0 == 0, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=c('opioid_t1','therapy_t1')]
names(trans_opioid_zero)[2:3] = paste0('opioid0_',names(trans_opioid_zero)[2:3])
trans_opioid_one   = mbr[opioid_t0 == 1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=c('opioid_t1','therapy_t1')]
names(trans_opioid_one)[2:3] = paste0('opioid1_',names(trans_opioid_one)[2:3])
trans_therapy_zero = mbr[therapy_t0 == 0, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=c('opioid_t1','therapy_t1')]
names(trans_therapy_zero)[2:3] = paste0('therapy0_',names(trans_therapy_zero)[2:3])
trans_therapy_one  = mbr[therapy_t0 == 1, lapply(.SD, mean, na.rm=TRUE), by=geo_unit, .SDcols=c('opioid_t1','therapy_t1')]
names(trans_therapy_one)[2:3] = paste0('therapy1_',names(trans_therapy_one)[2:3])

geo_mbr = merge(N_mbr, sum_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, mean_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, pain_mean_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, backpain_mean_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, neckpain_mean_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, limbpain_mean_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, targetpain_mean_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, ord_mean_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, od_mean_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, opioid_mean_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, op_naive_mean_mbr, by=geo_unit, all=TRUE)

geo_mbr = merge(geo_mbr, pain_sum_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, backpain_sum_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, neckpain_sum_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, limbpain_sum_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, targetpain_sum_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, ord_sum_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, od_sum_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, opioid_sum_mbr, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, op_naive_sum_mbr, by=geo_unit, all=TRUE)

geo_mbr = merge(geo_mbr, trans_opioid_zero , by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, trans_opioid_one  , by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, trans_therapy_zero, by=geo_unit, all=TRUE)
geo_mbr = merge(geo_mbr, trans_therapy_one , by=geo_unit, all=TRUE)

pre_condition_list = c('all','targetpain','pain','ord','od','opioid','op_naive')

for (var in pre_condition_list){
	geo_mbr[[paste0(var,'_','sum_sum_opioid_quantity')]] =  as.numeric(geo_mbr[[paste0(var,'_','sum_sum_opioid_quantity')]])
	geo_mbr[[paste0(var,'_','sum_sum_opioid_days')]] =  as.numeric(geo_mbr[[paste0(var,'_','sum_sum_opioid_days')]])
	geo_mbr[[paste0(var,'_','sum_sum_opioid_mme')]] =  as.numeric(geo_mbr[[paste0(var,'_','sum_sum_opioid_mme')]])

	geo_mbr[[paste0(var,'_','mean_sum_opioid_quantity')]] =  as.numeric(geo_mbr[[paste0(var,'_','mean_sum_opioid_quantity')]])
	geo_mbr[[paste0(var,'_','mean_sum_opioid_days')]] =  as.numeric(geo_mbr[[paste0(var,'_','mean_sum_opioid_days')]])
	geo_mbr[[paste0(var,'_','mean_sum_opioid_mme')]] =  as.numeric(geo_mbr[[paste0(var,'_','mean_sum_opioid_mme')]])
}


logger::log_info('now writing data ', outfile)
fwrite(geo_mbr, outfile)

